1 Data upload

2 Time management

3 ND removed, Converted to a Dataframe

#replace ND with 0

tr <- matrix(data = NA, ncol = ncol(dt[,c(1:46)]), nrow=nrow(dt))
colnames(tr) <- colnames(dt[,c(1:46)])
for (i in 12:46)
{
  tr[,c(i)] <- gsub(".*ND.*", 0, dt[,i])
}

for(i in 1:11)
{
  tr[,c(i)] <- dt[,c(i)]
}
   

#transform to dataframe
tr <- as.data.frame.matrix(tr) #A correct command to change the dataset to dataframe after transformations
tr[,12:44] <- sapply(tr[,12:44],as.numeric) # Change a character to numeric (double)
typeof(tr$Cu_concentration) # confirm the value is no longer a character
## [1] "double"

4 Head of the dataset

head(tr)
Data frame is now printed using kable.
Scientific_Name Group Plot Sample_Name Tube_No Type_of_Sample Total_Weight Cup_No pXRF_measurement_ID File Material Cl_concentration Cl_uncertainty Ca_concentration Ca_uncertainty Ti_concentration Ti_uncertainty Cr_concentration Cr_uncertainty Mn_concentration Mn_uncertainty Fe_concentration Fe_uncertainty Co_concentration Co_uncertainty Ni_concentration Ni_uncertainty Cu_concentration Cu_uncertainty Zn_concentration Zn_uncertainty As_concentration As_uncertainty Se_concentration Se_uncertainty Cd_concentration Cd_uncertainty Re_concentration Re_uncertainty Hg_concentration Hg_uncertainty Tl_concentration Tl_uncertainty Pb_concentration Pb_uncertainty Substrate_RT
Allionia incarnata G3 P1 P1_29_1 30;31 leaf 0.55 17 2126 Scan2126_19.14.hdx Plant 580 268 48102 704 143.0 27.5 7.7 5.5 36.2 10.0 1037 33.0 0 2.5 0 1.2 37.5 4.5 0.0 1.1 5.0 0.3 1.0 0.6 0.0 2.5 0.0 1.5 0 0.4 0 0.6 0 0.9 0.057585236
Allionia incarnata G3 P1 P1_30 28 leaf 0.166 18 2127 Scan2127_19.19.hdx Plant 306 178 22621 439 132.0 22.5 4.8 4.0 47.8 10.7 1709 46.9 0 3.3 0 1.2 8.1 3.2 11.8 3.8 2.5 1.5 0.0 0.6 0.0 4.7 0.0 2.1 0 0.6 0 0.7 0 2.5 0.014363615
Allionia incarnata G3 P1 P1_29_1_2 29 leaf 0.213 19 2128 Scan2128_19.25.hdx Plant 527 259 47147 698 124.0 25.7 10.9 6.2 25.2 9.1 866 30.8 0 2.3 0 1.2 35.9 5.0 0.0 1.2 6.5 0.4 0.8 0.6 0.0 3.8 0.0 1.9 0 0.5 0 0.5 0 1 0.034562402
Allionia incarnata G3 P2 P2_E12 33;34 leaf 0.332 20 2129 Scan2129_19.30.hdx Plant 2576 462 37856 611 146.0 26.3 10.9 6.0 30.7 10.0 1320 35.8 0 2.5 0 1.2 47.1 5.1 0.0 1.8 6.2 0.4 2.6 0.7 0.0 4.5 4.6 2.6 0 0.5 0 0.5 0 0.8 0.054200765
Allionia incarnata G3 P2 P2_E12_1 32 leaf 0.183 21 2130 Scan2130_19.35.hdx Plant 4756 619 29095 530 90.3 20.5 10.6 5.6 0.0 5.8 668 26.6 0 2.0 0 1.2 26.1 4.6 0.0 1.5 4.4 1.2 2.7 1.0 20.1 20.1 6.4 3.4 0 0.7 0 0.9 0 1.9 0.024824264
Allionia incarnata G3 P2 P2_E12_2 26 leaf 0.164 22 2131 Scan2131_19.39.hdx Plant 753 234 6209 233 30.6 11.5 0.0 2.5 0.0 5.1 371 23.4 0 2.2 0 1.5 7.9 3.2 0.0 1.7 3.3 1.6 3.6 1.5 0.0 14.2 0.0 2.9 0 0.8 0 1.2 0 2.8 0.011043405

5 Subsets and wrangling

#Filtering with tydeverse library
dt_plants <- filter(tr,  Scientific_Name != 'QA_Sample')

P1 <- filter(dt_plants, Plot == "P1")
P2 <- filter(dt_plants, Plot == "P2")
P5 <- filter(dt_plants, Plot == "P5")
P6 <- filter(dt_plants, Plot == "P6")
P125 <- filter(dt_plants, Plot != "P6")

Se_best <- subset(dt_plants, Scientific_Name == 'Isocoma cf. tenuisecta' | Scientific_Name == 'Populus fremontii' | Scientific_Name == 'Senegalia (Acacia) greggii' )

Re_best <- subset(dt_plants, Scientific_Name == 'Isocoma cf. tenuisecta' | Scientific_Name == 'Baccharis sarothroides' | Scientific_Name == 'Senegalia (Acacia) greggii'| Scientific_Name == 'Nultuma (Prosopis) velutina' | Scientific_Name == 'Mimosa biuncifera (=aculeaticarpa)' | Scientific_Name == 'Fraxinus velutina'| Scientific_Name == 'Datura wrightii' )


# Dropping uncertainty columns for PCA analysis

dt_plants_nounc = select(dt_plants, -Cl_uncertainty,-Ca_uncertainty, -Ti_uncertainty,
                         -Cr_uncertainty, -Mn_uncertainty, -Fe_uncertainty, -Ni_uncertainty, -Cu_uncertainty,
                         -Zn_uncertainty, -As_uncertainty, -Se_uncertainty, -Cd_uncertainty, -Re_uncertainty, -Hg_uncertainty, -Co_uncertainty,
                         -Tl_uncertainty, -Pb_uncertainty, -Substrate_RT)

dt_plants_nounc = select(dt_plants_nounc, -Hg_concentration, -Tl_concentration, -Pb_concentration, -Ni_concentration, -Co_concentration)


#Filtering plants By Plot with subset function

dt_plants_nounc1 <- subset(dt_plants_nounc, Plot=="P1")
dt_plants_nounc2 <- subset(dt_plants_nounc, Plot=="P2")
dt_plants_nounc5 <- subset(dt_plants_nounc, Plot=="P5")
dt_plants_nounc6 <- subset(dt_plants_nounc, Plot=="P6")
dt_plants_nounce15 <- subset(dt_plants_nounc, Plot=="P1" | Plot=="P5")
dt_plants_nounce125 <- subset(dt_plants_nounc, Plot=="P1" | Plot=="P5" | Plot=="P2")

#Removing _concentration from column names

colnames(dt_plants_nounce125)[12] <- "Cl"
colnames(dt_plants_nounce125)[13] <- "Ca"
colnames(dt_plants_nounce125)[14] <- "Ti"
colnames(dt_plants_nounce125)[15] <- "Cr"
colnames(dt_plants_nounce125)[16] <- "Mn"
colnames(dt_plants_nounce125)[17] <- "Fe"
colnames(dt_plants_nounce125)[18] <- "Cu"
colnames(dt_plants_nounce125)[19] <- "Zn"
colnames(dt_plants_nounce125)[20] <- "As"
colnames(dt_plants_nounce125)[21] <- "Se"
colnames(dt_plants_nounce125)[22] <- "Cd"
colnames(dt_plants_nounce125)[23] <- "Re"

colnames(dt_plants_nounc6)[12] <- "Cl"
colnames(dt_plants_nounc6)[13] <- "Ca"
colnames(dt_plants_nounc6)[14] <- "Ti"
colnames(dt_plants_nounc6)[15] <- "Cr"
colnames(dt_plants_nounc6)[16] <- "Mn"
colnames(dt_plants_nounc6)[17] <- "Fe"
colnames(dt_plants_nounc6)[18] <- "Cu"
colnames(dt_plants_nounc6)[19] <- "Zn"
colnames(dt_plants_nounc6)[20] <- "As"
colnames(dt_plants_nounc6)[21] <- "Se"
colnames(dt_plants_nounc6)[22] <- "Cd"
colnames(dt_plants_nounc6)[23] <- "Re"

6 Data Visualization

6.1 Boxplots - Cu - All plots

Cu_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Cu_concentration, FUN = median), y = Cu_concentration, group=Scientific_Name)) +
  geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
  #theme(legend.position = "none")+
  scale_x_discrete(guide = guide_axis(angle = 0))+
  geom_jitter(aes(colour = Plot), size=1) +
  ylim(0,600)+
  coord_flip()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Cu_AllPlots

6.2 Boxplots - Re - All Plots

Re_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Re_concentration, FUN = median), y = Re_concentration, group=Scientific_Name)) +
  geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
  #theme(legend.position = "none")+
  scale_x_discrete(guide = guide_axis(angle = 0))+
  geom_jitter(aes(colour = Plot), size=1) +
  #ylim(0,600)+
  coord_flip()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Re_AllPlots

6.3 Boxplots - Re - Selected species

Re_box <- ggplot(Re_best, aes(x = reorder(Scientific_Name, Re_concentration, FUN = median), y = Re_concentration, fill=Scientific_Name)) +
  geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
  theme(legend.position = "none")+
  scale_x_discrete(guide = guide_axis(angle = 45))+
  geom_jitter(color="#85b8bc", size=2, alpha=0.9) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  #scale_fill_manual(values = c("", "", "", "", "", "","" ))
  scale_fill_manual(values = c("#4b2866", "#c7abdd", "#a578c9", "#381e4c", "#8347b2", "#5d327f","#251433" ))
  #scale_fill_brewer(palette = "Greens")

Re_box

6.4 Boxplots - Zn - All Plots

Zn_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Zn_concentration, FUN = median), y = Zn_concentration, group=Scientific_Name)) +
  geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
  #theme(legend.position = "none")+
  scale_x_discrete(guide = guide_axis(angle = 0))+
  geom_jitter(aes(colour = Plot), size=1) +
  #ylim(0,600)+
  coord_flip()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Zn_AllPlots

6.5 Boxplots - Se - All Plots

Se_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Se_concentration, FUN = median), y = Se_concentration, group=Scientific_Name)) +
  geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
  #theme(legend.position = "none")+
  scale_x_discrete(guide = guide_axis(angle = 0))+
  geom_jitter(aes(colour = Plot), size=1) +
  ylim(0,60)+
  coord_flip()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Se_AllPlots

6.6 Boxplots - Se - Selected species

Se_box <- ggplot(Se_best, aes(x = reorder(Scientific_Name, Se_concentration, FUN=median), y = Se_concentration, fill=Scientific_Name)) +
  geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
  theme(legend.position = "none")+
  scale_x_discrete(guide = guide_axis(angle = 45))+
  geom_jitter(color="#85b8bc", size=3, alpha=0.9) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual(values = c("#251433", "#c7abdd", "#8347b2"))
Se_box

6.7 Boxplots - Cu - Plot 6

Plants collected at the plot 6 were growing directly on the mine tailings that were exposed on the area of 100 x 100 m. Shrubs were also collected in the close vicinity to the tailings given their rooting depths.

Plot 6

Cu_Plot6 <- ggplot(P6, aes(x = reorder(Scientific_Name, Cu_concentration, FUN = median), y = Cu_concentration, group=Scientific_Name)) +
  geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
  #theme(legend.position = "none")+
  scale_x_discrete(guide = guide_axis(angle = 0))+
  geom_jitter(aes(colour = Plot), size=1.6) +
  ylim(0,600)+
  coord_flip()+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  #scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E", "#38A6A5", "#73AF48", "#EDAD08", "#CC503E", "#38A6A5", "#73AF48", "#EDAD08"))
Cu_Plot6

7 Soil Table

8 PCA Analysis

8.1 Creating principal components

require(stats)
myPr1 <- prcomp(dt_plants_nounc1[,12:23], scale=TRUE)
myPr2 <- prcomp(dt_plants_nounc2[,12:23], scale=TRUE)
myPr5 <- prcomp(dt_plants_nounc5[,12:23], scale=TRUE)
myPr6 <- prcomp(dt_plants_nounc6[,12:23], scale=TRUE)
myPr15 <- prcomp(dt_plants_nounce15[,12:23], scale=TRUE)
myPr125 <- prcomp(dt_plants_nounce125[,12:23], scale=TRUE) # it was not working because the scale was FALSE

8.2 Biplots1

biplot(myPr1, scale=0)

biplot(myPr125, scale=0)

8.3 Biplots2 - Plot 1, 2 and 5

biplot125 <- biplot(myPr125,
             col=c('blue', 'red'),
             cex=c(0.8, 0.8),
             xlim=c(-.4, .4),
             main='PCA Results',
             expand=1.2)

8.4 Biplots2 - Plot 6

biplot6 <-  biplot(myPr6,
            col=c('blue', 'red'),
            cex=c(0.8, 0.8),
            xlim=c(-.4, .4),
            main='PCA Results',
            expand=1.2)

8.5 Bind dataframes with PC1 and PC2 for each plot

dt_plants1 <- cbind(dt_plants_nounc1, myPr1$x[,1:2])
dt_plants2 <- cbind(dt_plants_nounc2, myPr2$x[,1:2])
dt_plants5 <- cbind(dt_plants_nounc5, myPr5$x[,1:2])
dt_plants6 <- cbind(dt_plants_nounc6, myPr6$x[,1:2])
dt_plants15 <- cbind(dt_plants_nounce15, myPr15$x[,1:2])

8.6 PCA All plots

# Plot for all plot
myPr_all <- prcomp(dt_plants_nounc[,12:23], scale=TRUE)
dt_plants_all <- cbind(dt_plants_nounc, myPr_all$x[,1:2])

ggplot(dt_plants_all, aes(PC1, PC2, col=Plot, fill=Plot))+
  stat_ellipse(geom="polygon", col="black", alpha=0.5)+
  theme_classic()+
  geom_point(shape=21, col="black")

8.7 Variances across principle components

plot(myPr125, type="l")

summary(myPr1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5    PC6     PC7
## Standard deviation     1.7892 1.6134 1.2723 1.09515 1.00262 0.8604 0.71435
## Proportion of Variance 0.2668 0.2169 0.1349 0.09995 0.08377 0.0617 0.04252
## Cumulative Proportion  0.2668 0.4837 0.6186 0.71853 0.80230 0.8640 0.90652
##                            PC8     PC9   PC10    PC11    PC12
## Standard deviation     0.63622 0.59865 0.4436 0.30908 0.25739
## Proportion of Variance 0.03373 0.02987 0.0164 0.00796 0.00552
## Cumulative Proportion  0.94025 0.97012 0.9865 0.99448 1.00000

9 Partial Least Square Discriminant Analysis (PLS-DA)

library(readr)
library(dplyr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following objects are masked from 'package:reshape':
## 
##     expand, smiths
## The following object is masked from 'package:reshape2':
## 
##     smiths
library(ropls)




dt_plants_nounc_3 <- dt_plants_nounc |> select(-Scientific_Name, -Group, -Plot, -Sample_Name, -Tube_No, -Type_of_Sample, -Cup_No, -pXRF_measurement_ID, -File, -Material)

typeof(dt_plants_nounc_3$Total_Weight)
## [1] "character"
dt_plants_nounc_3[,1] <- sapply(dt_plants_nounc_3[,1],as.numeric)

dt_nounc_PCA <- opls(x=dt_plants_nounc_3)
## PCA
## 226 samples x 13 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.558   3   0

plot(dt_nounc_PCA)

plot(dt_nounc_PCA, typeVc ="x-score", parAsColFcVn=dt_plants_nounc$Plot)

dt_opls <-opls(dt_plants_nounc_3, dt_plants_nounc$Plot)
## PLS-DA
## 226 samples x 13 variables and 1 response
## standard scaling of predictors and response(s)
##       R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y  pQ2
## Total     0.59    0.179   0.122 0.398   4   0 0.05 0.05

summary(dt_opls)
## Length  Class   Mode 
##      1   opls     S4
plot(dt_nounc_PCA, typeVc ="x-score", parAsColFcVn=dt_plants_nounc$Cu)

dt_opls <-opls(dt_plants_nounc_3, dt_plants_nounc$Cu)
## PCA
## 226 samples x 13 variables
## standard scaling of predictors
##       R2X(cum) pre ort
## Total    0.558   3   0